import os
import re
import time
import random
import logging
import jieba
from toolbox import promote_file_to_downloadzone, update_ui, gen_time_str
from request_llms.bridge_all import predict_no_ui_long_connection
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading

def optimized_fast_token_count(text):
    """
    优化后的快速字数/Token数估算。
    在保持高速的同时，扩展字符覆盖范围并改进计数逻辑。
    """
    if not text:
        return 0
    count = 0
    # 1. 更全面的中日韩统一表意文字 (CJK) 范围 (包括扩展区 A, B 等主要区域)
    cjk_chars = re.findall(r'[\u3400-\u4DBF\u4e00-\u9FFF\uF900-\uFAFF\u3000-\u303F]+', text)
    for match in cjk_chars:
        count += len(match)
    # 2. 日文假名 (平假名 + 片假名)
    japanese_chars = re.findall(r'[\u3040-\u309F\u30A0-\u30FF\u31F0-\u31FF]+', text)
    for match in japanese_chars:
        count += len(match)
    # 3. 韩文谚文 (Hangul Syllables)
    korean_chars = re.findall(r'[\uAC00-\uD7AF]+', text)
    for match in korean_chars:
        count += len(match)
    # 4. 更合理的英文/数字计数
    potential_tokens = re.split(r'\s+', text)
    english_like_count = sum(1 for token in potential_tokens if re.search(r'[a-zA-Z0-9]', token))
    count += english_like_count
    # 5. 其它符号 (标点等)
    non_space_chars = re.findall(r'[^\s]', text)
    estimated_others_count = len(non_space_chars) - count
    count += max(0, estimated_others_count)
    return count

def fast_token_count(text):
    return optimized_fast_token_count(text)

def count_markdown_elements(text):
    """统计Markdown文本中的各种元素数量"""
    # 用空行（或多个空行）分隔的文本块统计段落
    paragraphs = re.split(r'(?:\n\s*){2,}', text.strip())
    paragraphs = [p for p in paragraphs if p.strip()]
    paragraph_count = len(paragraphs)
    stats = {
        "段落数量": paragraph_count,
        "token数量": fast_token_count(text),
        "代码块数量": len(re.findall(r'```.*?\n.*?```', text, re.DOTALL)),
        "表格数量": len(re.findall(r'\|.*\|.*\n\|[\s\-:]+\|', text)),
        "句子数量": len(re.findall(r'[.!?。！？]["\'\)\]]*\s', text)),
        "标题数量": len(re.findall(r'^#{1,6}\s+.*$', text, re.MULTILINE)),
        "链接数量": len(re.findall(r'\[.*?\]\(.*?\)', text)),
        "图片数量": len(re.findall(r'!\[.*?\]\(.*?\)', text))
    }
    return stats

def compare_translations(original, translated):
    """比较原文和译文的完整度"""
    original_stats = count_markdown_elements(original)
    translated_stats = count_markdown_elements(translated)
    
    comparison = {}
    for key in original_stats:
        original_count = original_stats[key]
        translated_count = translated_stats[key]
        if original_count > 0:
            completion_rate = translated_count / original_count * 100
        else:
            completion_rate = 100 if translated_count == 0 else float('inf')
        
        comparison[key] = {
            "原文": original_count,
            "译文": translated_count,
            "完整度": f"{completion_rate:.2f}%"
        }
    
    return comparison

def evaluate_translation_thread(original_fragment, translated_fragment):
    """评估单个翻译线程的翻译质量"""
    # 基本统计比较
    comparison = compare_translations(original_fragment, translated_fragment)
    
    # 计算总体完整度得分
    total_score = 0
    valid_metrics = 0
    
    for key, values in comparison.items():
        if key == "token数量":  # 跳过token数量，因为翻译后数量变化是正常的
            continue
        
        original_count = values["原文"]
        if original_count > 0:
            completion_percentage = float(values["完整度"].strip('%'))
            # 对于过多或过少都进行惩罚，理想值是100%
            score = 100 - abs(completion_percentage - 100)
            total_score += score
            valid_metrics += 1
    
    # 计算平均得分
    avg_score = total_score / valid_metrics if valid_metrics > 0 else 0
    
    # 检查特定元素的保留情况
    code_preservation = check_code_preservation(original_fragment, translated_fragment)
    link_preservation = check_link_preservation(original_fragment, translated_fragment)
    image_preservation = check_image_preservation(original_fragment, translated_fragment)
    
    # 返回评估结果
    return {
        "统计比较": comparison,
        "元素保留": {
            "代码块保留": code_preservation,
            "链接保留": link_preservation,
            "图片保留": image_preservation
        },
        "总体得分": f"{avg_score:.2f}"
    }

def check_code_preservation(original, translated):
    """检查代码块是否被正确保留"""
    original_code_blocks = re.findall(r'```(.*?)\n(.*?)```', original, re.DOTALL)
    translated_code_blocks = re.findall(r'```(.*?)\n(.*?)```', translated, re.DOTALL)
    
    if not original_code_blocks:
        return "无代码块"
    
    if len(original_code_blocks) != len(translated_code_blocks):
        return f"代码块数量不匹配: 原文 {len(original_code_blocks)} vs 译文 {len(translated_code_blocks)}"
    
    # 检查代码块内容是否基本保留
    preserved_count = 0
    for i, (orig_lang, orig_code) in enumerate(original_code_blocks):
        if i < len(translated_code_blocks):
            trans_lang, trans_code = translated_code_blocks[i]
            # 简单检查代码相似度 - 代码中的关键字和符号应该保持不变
            orig_keywords = re.findall(r'[a-zA-Z_][a-zA-Z0-9_]*|[=\+\-\*/\(\)\{\}\[\]<>]', orig_code)
            trans_keywords = re.findall(r'[a-zA-Z_][a-zA-Z0-9_]*|[=\+\-\*/\(\)\{\}\[\]<>]', trans_code)
            
            if len(orig_keywords) > 0:
                common_keywords = set(orig_keywords) & set(trans_keywords)
                similarity = len(common_keywords) / len(set(orig_keywords))
                if similarity > 0.8:  # 80%的关键字保留视为良好
                    preserved_count += 1
    
    preservation_rate = preserved_count / len(original_code_blocks) * 100 if original_code_blocks else 100
    return f"{preservation_rate:.2f}%"

def check_link_preservation(original, translated):
    """检查链接是否被正确保留"""
    original_links = re.findall(r'\[([^\]]*)\]\(([^)]*)\)', original)
    translated_links = re.findall(r'\[([^\]]*)\]\(([^)]*)\)', translated)
    
    if not original_links:
        return "无链接"
    
    if len(original_links) != len(translated_links):
        return f"链接数量不匹配: 原文 {len(original_links)} vs 译文 {len(translated_links)}"
    
    # 检查链接URL是否保留
    preserved_count = 0
    for i, (orig_text, orig_url) in enumerate(original_links):
        if i < len(translated_links):
            trans_text, trans_url = translated_links[i]
            if orig_url == trans_url:
                preserved_count += 1
    
    preservation_rate = preserved_count / len(original_links) * 100 if original_links else 100
    return f"{preservation_rate:.2f}%"

def check_image_preservation(original, translated):
    """检查图片是否被正确保留"""
    original_images = re.findall(r'!\[([^\]]*)\]\(([^)]*)\)', original)
    translated_images = re.findall(r'!\[([^\]]*)\]\(([^)]*)\)', translated)
    
    if not original_images:
        return "无图片"
    
    if len(original_images) != len(translated_images):
        return f"图片数量不匹配: 原文 {len(original_images)} vs 译文 {len(translated_images)}"
    
    # 检查图片URL是否保留
    preserved_count = 0
    for i, (orig_text, orig_url) in enumerate(original_images):
        if i < len(translated_images):
            trans_text, trans_url = translated_images[i]
            if orig_url == trans_url:
                preserved_count += 1
    
    preservation_rate = preserved_count / len(original_images) * 100 if original_images else 100
    return f"{preservation_rate:.2f}%"

def generate_quality_report(project_folder, file_pairs, book_name=None, split_methods=None, md_file_slices_dict=None, original_texts_dict=None, prefix=''):
    max_retries = 3
    for attempt in range(max_retries):
        try:
            reports_dir = os.path.join(project_folder, 'reports')
            os.makedirs(reports_dir, exist_ok=True)
            timestamp = time.strftime("%Y%m%d%H%M%S", time.localtime())
            temp_file = os.path.join(reports_dir, f'temp_{timestamp}')
            if book_name:
                prefixed_name = f'{prefix}{book_name}' if prefix else book_name
                report_file = os.path.join(reports_dir, f'{prefixed_name}_{timestamp}_check0.md')
            else:
                prefixed_name = f'{prefix}{timestamp}' if prefix else timestamp
                report_file = os.path.join(reports_dir, f'{prefixed_name}-translation_quality_report.md')
            with open(temp_file, 'w', encoding='utf-8') as f:
                f.write('')
            if md_file_slices_dict is not None and original_texts_dict is not None:
                insert_continuity_check_to_report(md_file_slices_dict, original_texts_dict, temp_file)
            with open(temp_file, 'a', encoding='utf-8') as f:
                f.write('# 翻译质量监测报告\n\n')
                if split_methods is not None and len(split_methods) > 0:
                    method_set = set(split_methods)
                    if len(method_set) == 1:
                        f.write(f'**本次切分方法为：{list(method_set)[0]}**\n\n')
                    else:
                        f.write(f'**本次切分方法包括：{"，".join(method_set)}**\n\n')
                f.write('## 总体统计\n\n')
                total_files = len(file_pairs)
                f.write(f'- 总文件数: {total_files}\n\n')
                for i, (original_path, translated_path) in enumerate(file_pairs):
                    f.write(f'## 文件 {i+1}: {os.path.basename(original_path)}\n\n')
                    try:
                        with open(original_path, 'r', encoding='utf-8', errors='replace') as orig_f:
                            original_content = orig_f.read()
                        with open(translated_path, 'r', encoding='utf-8', errors='replace') as trans_f:
                            translated_content = trans_f.read()
                        comparison = compare_translations(original_content, translated_content)
                        f.write('| 监测维度 | 原文 | 译文 | 完整度 |\n')
                        f.write('| --- | --- | --- | --- |\n')
                        for key in ["段落数量", "token数量", "句子数量", "标题数量", "链接数量", "图片数量"]:
                            values = comparison.get(key, {"原文":0, "译文":0, "完整度":"0%"})
                            f.write(f"| {key} | {values['原文']} | {values['译文']} | {values['完整度']} |\n")
                        f.write('\n')
                    except Exception as e:
                        f.write(f'分析时出错: {str(e)}\n\n')
            os.replace(temp_file, report_file)
            return report_file
        except Exception as e:
            if attempt == max_retries -1:
                logging.error(f"报告生成失败: {str(e)}")
                raise
            time.sleep(1)
    return report_file

def evaluate_thread_translations(original_fragments, translated_fragments, report_file):
    with open(report_file, 'a', encoding='utf-8') as f:
        f.write('\n## 翻译线程质量评估\n\n')
        for i, (orig, trans) in enumerate(zip(original_fragments, translated_fragments)):
            f.write(f'### 线程 {i+1} 翻译质量评估\n\n')
            try:
                evaluation = evaluate_translation_thread(orig, trans)
                f.write('#### 元素统计比较\n\n')
                f.write('| 监测维度 | 原文 | 译文 | 完整度 |\n')
                f.write('| --- | --- | --- | --- |\n')
                for key in ["段落数量", "句子数量", "标题数量", "链接数量", "图片数量"]:
                    values = evaluation["统计比较"].get(key, {"原文":0, "译文":0, "完整度":"0%"})
                    f.write(f"| {key} | {values['原文']} | {values['译文']} | {values['完整度']} |\n")
                f.write('\n---\n\n')
            except Exception as e:
                f.write(f'分析时出错: {str(e)}\n\n')
    return report_file

def monitor_translation_quality(original_files, translated_files, project_folder, chatbot, book_name):
    """
    监测翻译质量，生成报告
    此函数保留用于兼容性，但不再生成单独的报告
    """
    # 简单返回一个空字符串，不再生成额外的报告
    return ""

def monitor_thread_translation_quality(original_fragments, translated_fragments, project_folder, chatbot, book_name):
    """
    监测线程级翻译质量，生成报告
    此函数保留用于兼容性，但不再生成单独的报告
    """
    # 简单返回一个空字符串，不再生成额外的报告
    return ""

def check_translation_completeness_batch(batch_data, llm_kwargs, chatbot, batch_index):
    """
    检查一批翻译片段的完整性
    
    参数:
    - batch_data: 包含原文和译文的字典列表
    - llm_kwargs: 大语言模型参数
    - chatbot: 聊天机器人对象
    - batch_index: 批次索引
    
    返回:
    - 大语言模型的评估结果
    """
    # 构建提示词
    prompt = (
        "你是一位专业的翻译质量评估专家。请分析以下每组原文和译文，判断译文的最后几个句子是否完整准确地翻译了原文的最后几个句子。\n"
        "请严格遵循以下规则：\n"
        "1. 只考虑原文最后几个句子是否被翻译\n"
        "2. 只要译文表达了原文的核心意思，即使措辞不完全一致，也视为正确\n"
        "3. 如果译文有明显的遗漏，视为不正确\n"
        "4. 多余的，不相干的译文，不作为判断依据，忽略掉。只看译文是否完成对原文的翻译覆盖。\n"
        "5. 输出格式必须严格按照以下要求：\n"
        "   ## 第N个切片翻译是否完整\n"
        "   是/否\n"
        "\n"
        "评估内容如下：\n\n"
    )
    
    for item in batch_data:
        prompt += f"## 第{item['index']}个切片\n"
        prompt += f"### 原文：\n{item['original']}\n\n"
        prompt += f"### 译文：\n{item['translated']}\n\n"
    
    # 调用大语言模型进行评估
    max_retries = 3
    retry_delay = 2
    
    for attempt in range(max_retries):
        try:
            batch_msg = f"批次 {batch_index+1}: 正在评估第{batch_data[0]['index']}到第{batch_data[-1]['index']}个切片..."
            chatbot.append((f"正在使用大语言模型评估翻译完整性...", batch_msg))
            yield from update_ui(chatbot=chatbot, history=[])
            
            # 使用项目自带的大语言模型接口
            response = yield from request_gpt_model_in_new_thread_with_ui_alive(
                inputs=prompt, 
                inputs_show_user=f"批次 {batch_index+1}: 翻译完整性评估",
                llm_kwargs=llm_kwargs, 
                chatbot=chatbot, 
                history=[],
                sys_prompt="你是一位专业的翻译质量评估专家"
            )
            
            return {
                "batch_index": batch_index,
                "batch_data": batch_data,
                "response": response,
                "prompt": prompt
            }
            
        except Exception as e:
            logging.error(f"批次 {batch_index+1} 翻译完整性评估失败(尝试{attempt+1}/{max_retries}): {str(e)}")
            if attempt < max_retries - 1:
                chatbot.append((f"批次 {batch_index+1} 评估失败，将在{retry_delay}秒后重试...", f"错误信息: {str(e)}"))
                yield from update_ui(chatbot=chatbot, history=[])
                time.sleep(retry_delay)
                retry_delay *= 2  # 指数退避
            else:
                return {
                    "batch_index": batch_index,
                    "batch_data": batch_data,
                    "response": f"评估失败: {str(e)}",
                    "error": True,
                    "prompt": prompt
                }

def check_translation_completeness(original_fragments, translated_fragments, project_folder, chatbot, book_name, llm_kwargs, prefix=''):
    """
    检查翻译完整性，特别关注每个片段末尾的句子是否被完整翻译
    使用多线程处理，每5个切片作为一个线程进行检查
    
    参数:
    - original_fragments: 原文片段列表
    - translated_fragments: 译文片段列表
    - project_folder: 项目文件夹路径
    - chatbot: 聊天机器人对象，用于更新UI
    - book_name: 书籍名称
    - llm_kwargs: 大语言模型参数
    
    返回:
    - 报告文件路径
    """
    # 创建书籍目录下的reports目录
    report_dir = os.path.join(project_folder, 'reports')
    if not os.path.exists(report_dir):
        os.makedirs(report_dir)
    
    # 添加时间戳，确保多次翻译同一本书时报告文件不会被覆盖
    timestamp = time.strftime("%Y%m%d%H%M%S", time.localtime())
    if book_name:
        prefixed_name = f'{prefix}{book_name}' if prefix else book_name
        report_file = os.path.join(report_dir, f'{prefixed_name}_{timestamp}_check1-翻译完整性检查.md')
    else:
        prefixed_name = f'{prefix}{timestamp}' if prefix else timestamp
        report_file = os.path.join(report_dir, f'{prefixed_name}_check1-翻译完整性检查.md')
    
    # 确保原文和译文片段数量一致
    min_len = min(len(original_fragments), len(translated_fragments))
    
    # 抽样策略：小于40全量抽样，大于40则抽样40个
    sample_indices = list(range(min_len))
    if min_len > 40:
        sample_size = 40
        sample_indices = sorted(random.sample(sample_indices, sample_size))
    else:
        sample_size = min_len
    
    # 准备评估数据
    evaluation_data = []
    for idx in sample_indices:
        # 获取原文和译文
        original_text = original_fragments[idx]
        translated_text = translated_fragments[idx]
        
        # 简单的句子分割（根据句号、问号、感叹号等）
        orig_sentences = re.split(r'[.!?。！？\n]', original_text)
        orig_sentences = [s.strip() for s in orig_sentences if s.strip()]
        
        trans_sentences = re.split(r'[.!?。！？\n]', translated_text)
        trans_sentences = [s.strip() for s in trans_sentences if s.strip()]
        
        # 获取最后三句原文和最后五句译文
        last_orig_sentences = orig_sentences[-3:] if len(orig_sentences) >= 3 else orig_sentences
        last_trans_sentences = trans_sentences[-5:] if len(trans_sentences) >= 5 else trans_sentences
        
        last_orig_text = " ".join(last_orig_sentences)
        last_trans_text = " ".join(last_trans_sentences)
        
        evaluation_data.append({
            "index": idx + 1,
            "original": last_orig_text,
            "translated": last_trans_text
        })
    
    # 将评估数据分成每5个一组
    batch_size = 5
    batches = [evaluation_data[i:i+batch_size] for i in range(0, len(evaluation_data), batch_size)]
    
    # 创建临时文件
    temp_report_file = f"{report_file}.tmp"
    
    # 使用生成器处理每个批次
    def process_all_batches():
        results = []
        for i, batch in enumerate(batches):
            result = yield from check_translation_completeness_batch(batch, llm_kwargs, chatbot, i)
            if result:
                results.append(result)
                
                # 实时更新临时报告文件，显示已完成的批次结果
                with open(temp_report_file, 'w', encoding='utf-8') as f:
                    f.write('# 翻译完整性检查报告\n\n')
                    f.write('## 评估概述\n\n')
                    f.write(f'- 总切片数: {min_len}\n')
                    f.write(f'- 评估样本数: {sample_size}\n')
                    f.write(f'- 评估批次数: {len(batches)}\n')
                    f.write(f'- 已完成批次: {len(results)}/{len(batches)}\n\n')
                    
                    # 汇总已完成批次的结果
                    f.write('## 大模型评估结果\n\n')
                    for res in sorted(results, key=lambda x: x["batch_index"]):
                        f.write(f"### 批次 {res['batch_index']+1}\n\n")
                        if res.get("error", False):
                            f.write(f"**评估失败**: {res['response']}\n\n")
                        else:
                            f.write(f"{res['response']}\n\n")
        
        # 所有批次处理完毕，生成最终报告
        with open(temp_report_file, 'w', encoding='utf-8') as f:
            f.write('# 翻译完整性检查报告\n\n')
            f.write('## 评估概述\n\n')
            f.write(f'- 总切片数: {min_len}\n')
            f.write(f'- 评估样本数: {sample_size}\n')
            f.write(f'- 评估批次数: {len(batches)}\n\n')
            
            # 汇总所有批次的结果
            f.write('## 大模型评估结果\n\n')
            for result in sorted(results, key=lambda x: x["batch_index"]):
                f.write(f"### 批次 {result['batch_index']+1}\n\n")
                if result.get("error", False):
                    f.write(f"**评估失败**: {result['response']}\n\n")
                else:
                    f.write(f"{result['response']}\n\n")
            
            # 添加原始提示词和详细评估数据
            first_prompt = results[0]["prompt"] if results else ""
            f.write('## 喂给大模型的文本示例\n\n')
            f.write(f"```\n{first_prompt}\n```\n\n")
            
            f.write('## 详细评估数据\n\n')
            for result in sorted(results, key=lambda x: x["batch_index"]):
                for item in result['batch_data']:
                    f.write(f"### 第{item['index']}个切片\n")
                    f.write(f"#### 原文：\n{item['original']}\n\n")
                    f.write(f"#### 译文：\n{item['translated']}\n\n")
        
        # 验证临时文件
        if os.path.getsize(temp_report_file) < 100:
            raise ValueError("生成的报告文件过小，可能不完整")
        
        # 成功生成后重命名临时文件（原子操作）
        os.rename(temp_report_file, report_file)
        
        # 将报告文件添加到下载区
        promote_file_to_downloadzone(report_file)
        
        return report_file
    
    # 执行所有批次的处理
    chatbot.append(("开始多线程翻译完整性检查...", f"共 {len(batches)} 个批次，每批次最多 {batch_size} 个切片"))
    yield from update_ui(chatbot=chatbot, history=[])
    
    result_file = yield from process_all_batches()
    
    chatbot.append(("翻译完整性检查完成", f"报告已生成: {os.path.basename(result_file)}"))
    yield from update_ui(chatbot=chatbot, history=[])
    
    return result_file

def get_first_last_sentence(text):
    # 按句号、问号、感叹号、换行分割，取首末非空句
    sentences = re.split(r'[.!?。！？\n]', text)
    sentences = [s.strip() for s in sentences if s.strip()]
    if not sentences:
        return '', ''
    return sentences[0], sentences[-1]

def diff_preview(a, b, context=40):
    min_len = min(len(a), len(b))
    for i in range(min_len):
        if a[i] != b[i]:
            break
    else:
        i = min_len
    # 如果内容很短，直接显示全部
    if len(a) <= context*2 and len(b) <= context*2:
        a_snip = a
        b_snip = b
        pointer = ' ' * i + '↑'
    else:
        a_snip = a[max(0, i-context):i+context]
        b_snip = b[max(0, i-context):i+context]
        pointer = ' ' * min(i, context) + '↑'
    return f"原文片段:   {a_snip}\n切片内容:   {b_snip}\n           {pointer}"

def similar_ratio(a, b):
    # 计算两个字符串的相似度（编辑距离/最大长度）
    import difflib
    return difflib.SequenceMatcher(None, a, b).ratio()

# 1. 精简 check_slices_continuity 只保留指定内容

def check_slices_continuity(slices, original_text):
    pos = 0
    details = []
    ok = True
    for idx, s in enumerate(slices):
        expected = original_text[pos:pos+len(s)]
        s_len = len(s)
        e_len = len(expected)
        overall_sim = similar_ratio(s, expected)
        overall_eq = (overall_sim >= 0.99)
        details.append(f"# 切片{idx+1} 整体匹配度: {overall_sim*100:.2f}%  {'✅一致' if overall_eq else '❌不一致'}\n")
        details.append(f"切片连续测试函数:")
        details.append(f"  字数对比: 切片字数: {s_len}，原文片段字数: {e_len}")
        # 重叠检测
        overlap = 0
        for n in range(min(len(s), 30), 0, -1):
            if pos-n >= 0 and s[:n] == original_text[pos-n:pos]:
                overlap = n
                break
        if overlap > 0:
            details.append(f"  重叠检测: 前{overlap}字符与前一切片重叠，原文[{pos-overlap}:{pos-overlap+len(s)}]")
            pos = pos - overlap
        pos += len(s)
        details.append('---')
    if pos < len(original_text):
        details.append(f"切片连续测试函数: ❌ 切片结束后原文还有剩余内容，可能有遗漏。原文剩余[{pos}:{len(original_text)}]")
        ok = False
    elif pos > len(original_text):
        details.append(f"切片连续测试函数: ❌ 切片总长度超出原文，可能有多余内容。切片累计长度{pos}，原文长度{len(original_text)}")
        ok = False
    return ok, '\n'.join(details)

# 2. 精简 generate_quality_report 只保留指定统计项

def generate_quality_report(project_folder, file_pairs, book_name=None, split_methods=None, md_file_slices_dict=None, original_texts_dict=None, prefix=''):
    max_retries = 3
    for attempt in range(max_retries):
        try:
            reports_dir = os.path.join(project_folder, 'reports')
            os.makedirs(reports_dir, exist_ok=True)
            timestamp = time.strftime("%Y%m%d%H%M%S", time.localtime())
            temp_file = os.path.join(reports_dir, f'temp_{timestamp}')
            if book_name:
                prefixed_name = f'{prefix}{book_name}' if prefix else book_name
                report_file = os.path.join(reports_dir, f'{prefixed_name}_{timestamp}_check0.md')
            else:
                prefixed_name = f'{prefix}{timestamp}' if prefix else timestamp
                report_file = os.path.join(reports_dir, f'{prefixed_name}-translation_quality_report.md')
            with open(temp_file, 'w', encoding='utf-8') as f:
                f.write('')
            if md_file_slices_dict is not None and original_texts_dict is not None:
                insert_continuity_check_to_report(md_file_slices_dict, original_texts_dict, temp_file)
            with open(temp_file, 'a', encoding='utf-8') as f:
                f.write('# 翻译质量监测报告\n\n')
                if split_methods is not None and len(split_methods) > 0:
                    method_set = set(split_methods)
                    if len(method_set) == 1:
                        f.write(f'**本次切分方法为：{list(method_set)[0]}**\n\n')
                    else:
                        f.write(f'**本次切分方法包括：{"，".join(method_set)}**\n\n')
                f.write('## 总体统计\n\n')
                total_files = len(file_pairs)
                f.write(f'- 总文件数: {total_files}\n\n')
                for i, (original_path, translated_path) in enumerate(file_pairs):
                    f.write(f'## 文件 {i+1}: {os.path.basename(original_path)}\n\n')
                    try:
                        with open(original_path, 'r', encoding='utf-8', errors='replace') as orig_f:
                            original_content = orig_f.read()
                        with open(translated_path, 'r', encoding='utf-8', errors='replace') as trans_f:
                            translated_content = trans_f.read()
                        comparison = compare_translations(original_content, translated_content)
                        f.write('| 监测维度 | 原文 | 译文 | 完整度 |\n')
                        f.write('| --- | --- | --- | --- |\n')
                        for key in ["段落数量", "token数量", "句子数量", "标题数量", "链接数量", "图片数量"]:
                            values = comparison.get(key, {"原文":0, "译文":0, "完整度":"0%"})
                            f.write(f"| {key} | {values['原文']} | {values['译文']} | {values['完整度']} |\n")
                        f.write('\n')
                    except Exception as e:
                        f.write(f'分析时出错: {str(e)}\n\n')
            os.replace(temp_file, report_file)
            return report_file
        except Exception as e:
            if attempt == max_retries -1:
                logging.error(f"报告生成失败: {str(e)}")
                raise
            time.sleep(1)
    return report_file

# 3. 精简 evaluate_thread_translations 只保留指定统计项

def evaluate_thread_translations(original_fragments, translated_fragments, report_file):
    with open(report_file, 'a', encoding='utf-8') as f:
        f.write('\n## 翻译线程质量评估\n\n')
        for i, (orig, trans) in enumerate(zip(original_fragments, translated_fragments)):
            f.write(f'### 线程 {i+1} 翻译质量评估\n\n')
            try:
                evaluation = evaluate_translation_thread(orig, trans)
                f.write('#### 元素统计比较\n\n')
                f.write('| 监测维度 | 原文 | 译文 | 完整度 |\n')
                f.write('| --- | --- | --- | --- |\n')
                for key in ["段落数量", "句子数量", "标题数量", "链接数量", "图片数量"]:
                    values = evaluation["统计比较"].get(key, {"原文":0, "译文":0, "完整度":"0%"})
                    f.write(f"| {key} | {values['原文']} | {values['译文']} | {values['完整度']} |\n")
                f.write('\n---\n\n')
            except Exception as e:
                f.write(f'分析时出错: {str(e)}\n\n')
    return report_file

def insert_continuity_check_to_report(md_file_slices_dict, original_texts_dict, report_file):
    """
    对每个md文件的切片做详细连续性检查，并将结果插入到report_file的开头。
    """
    results = []
    for md_name, slices in md_file_slices_dict.items():
        original_text = original_texts_dict.get(md_name, '')
        ok, detail = check_slices_continuity(slices, original_text)
        status = '✅通过' if ok else '❌未通过'
        results.append(f"### {md_name}\n{status}\n{detail}\n")
    summary = '# 切片连续性检查结果（逐切片对比，含首末句和字数对比）\n\n' + '\n'.join(results) + '\n\n'
    # 读取原报告内容
    if os.path.exists(report_file):
        with open(report_file, 'r', encoding='utf-8') as f:
            old_content = f.read()
    else:
        old_content = ''
    # 写入新内容到文件开头
    with open(report_file, 'w', encoding='utf-8') as f:
        f.write(summary)
        f.write(old_content)
    return report_file

def write_continuity_check_report(md_file_slices_dict, original_texts_dict, book_name=None, project_folder=None, prefix=''):
    """
    将切片连续性检查结果单独输出到check2-切片质量检查.md
    """
    import time, os, random
    if project_folder is not None:
        reports_dir = os.path.join(project_folder, 'reports')
    else:
        reports_dir = os.path.join(os.getcwd(), 'reports')
    os.makedirs(reports_dir, exist_ok=True)
    timestamp = time.strftime("%Y%m%d%H%M%S", time.localtime())
    if book_name:
        report_file = os.path.join(reports_dir, f'{book_name}_{timestamp}_check2-切片质量检查.md')
    else:
        report_file = os.path.join(reports_dir, f'{timestamp}_check2-切片质量检查.md')
    
    # 生成主报告内容
    results = []
    for md_name, slices in md_file_slices_dict.items():
        original_text = original_texts_dict.get(md_name, '')
        ok, detail = check_slices_continuity(slices, original_text)
        status = '✅通过' if ok else '❌未通过'
        results.append(f"### {md_name}\n{status}\n{detail}\n")
    summary = '# 切片连续性检查结果（逐切片对比，含首末句和字数对比）\n\n' + '\n'.join(results) + '\n\n'
    
    # 添加随机10个抽样的切片与原文对比
    random_samples = []
    all_slices = []
    
    # 收集所有切片和对应的文件名
    for md_name, slices in md_file_slices_dict.items():
        for i, slice_text in enumerate(slices):
            all_slices.append((md_name, i, slice_text))
    
    # 如果切片总数小于10，全部展示
    if len(all_slices) <= 10:
        random_samples = all_slices
    else:
        # 确保抽样分布在书籍的各个位置
        # 将所有切片分成10个区间，每个区间随机选择一个切片
        interval_size = max(1, len(all_slices) // 10)  # 确保区间大小至少为1
        for i in range(10):
            start_idx = i * interval_size
            end_idx = min((i + 1) * interval_size, len(all_slices))  # 确保不会超出索引范围
            if start_idx < end_idx:  # 确保区间有效
                sample_idx = random.randint(start_idx, end_idx - 1)
                random_samples.append(all_slices[sample_idx])
    
    # 生成随机抽样报告
    random_samples_report = '# 随机抽样切片与原文对比\n\n'
    
    # 确保有抽样数据
    if random_samples:
        for idx, (md_name, slice_idx, slice_text) in enumerate(random_samples):
            original_text = original_texts_dict.get(md_name, '')
            if not original_text:  # 如果找不到原文，跳过
                continue
            
            # 使用与check_slices_continuity相同的逻辑计算切片在原文中的位置
            slices = md_file_slices_dict[md_name]
            pos = 0
            original_slice = ""  # 初始化，避免未定义
            
            # 计算切片在原文中的位置
            for i in range(slice_idx + 1):
                if i == slice_idx:  # 当前切片
                    # 检查重叠
                    overlap = 0
                    for n in range(min(len(slices[i]), 30), 0, -1):
                        if pos-n >= 0 and slices[i][:n] == original_text[pos-n:pos]:
                            overlap = n
                            break
                    if overlap > 0:
                        pos = pos - overlap
                    
                    # 确保索引不会超出范围
                    if pos < len(original_text):
                        end_pos = min(pos + len(slices[i]), len(original_text))
                        original_slice = original_text[pos:end_pos]
                    break
                else:  # 之前的切片
                    # 检查重叠
                    overlap = 0
                    for n in range(min(len(slices[i]), 30), 0, -1):
                        if i > 0 and pos-n >= 0 and slices[i][:n] == original_text[pos-n:pos]:
                            overlap = n
                            break
                    if overlap > 0:
                        pos = pos - overlap
                    pos += len(slices[i])
            
            # 添加到报告中
            random_samples_report += f"## 抽样 {idx+1}: {md_name} 第 {slice_idx+1} 个切片\n\n"
            random_samples_report += f"### 切片内容\n\n```\n{slice_text}\n```\n\n"
            random_samples_report += f"### 对应原文\n\n```\n{original_slice}\n```\n\n"
            random_samples_report += "---\n\n"
    else:
        random_samples_report += "未找到有效的切片样本。\n\n"
    
    # 先将完整内容写入临时文件，然后重命名，确保原子性操作
    temp_file = f"{report_file}.tmp"
    try:
        with open(temp_file, 'w', encoding='utf-8') as f:
            f.write(summary)
            f.write(random_samples_report)
        
        # 重命名文件（原子操作）
        if os.path.exists(report_file):
            os.remove(report_file)
        os.rename(temp_file, report_file)
    except Exception as e:
        print(f"写入报告时发生错误: {str(e)}")
        if os.path.exists(temp_file):
            os.remove(temp_file)
    
    return report_file